1 package org.apache.lucene.search;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import java.io.IOException;
21 import java.util.LinkedList;
22
23 import org.apache.lucene.analysis.CannedTokenStream;
24 import org.apache.lucene.analysis.Token;
25 import org.apache.lucene.document.Document;
26 import org.apache.lucene.document.Field;
27 import org.apache.lucene.document.TextField;
28 import org.apache.lucene.index.DirectoryReader;
29 import org.apache.lucene.index.IndexReader;
30 import org.apache.lucene.index.IndexWriter;
31 import org.apache.lucene.index.IndexWriterConfig;
32 import org.apache.lucene.index.MultiFields;
33 import org.apache.lucene.index.RandomIndexWriter;
34 import org.apache.lucene.index.Term;
35 import org.apache.lucene.index.TermsEnum;
36 import org.apache.lucene.search.similarities.DefaultSimilarity;
37 import org.apache.lucene.store.Directory;
38 import org.apache.lucene.store.RAMDirectory;
39 import org.apache.lucene.util.BytesRef;
40 import org.apache.lucene.util.LuceneTestCase;
41 import org.junit.Ignore;
42
43
44
45
46
47
48 public class TestMultiPhraseQuery extends LuceneTestCase {
49
50 public void testPhrasePrefix() throws IOException {
51 Directory indexStore = newDirectory();
52 RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
53 add("blueberry pie", writer);
54 add("blueberry strudel", writer);
55 add("blueberry pizza", writer);
56 add("blueberry chewing gum", writer);
57 add("bluebird pizza", writer);
58 add("bluebird foobar pizza", writer);
59 add("piccadilly circus", writer);
60
61 IndexReader reader = writer.getReader();
62 IndexSearcher searcher = newSearcher(reader);
63
64
65 MultiPhraseQuery query1 = new MultiPhraseQuery();
66
67 MultiPhraseQuery query2 = new MultiPhraseQuery();
68 query1.add(new Term("body", "blueberry"));
69 query2.add(new Term("body", "strawberry"));
70
71 LinkedList<Term> termsWithPrefix = new LinkedList<>();
72
73
74 String prefix = "pi";
75 TermsEnum te = MultiFields.getFields(reader).terms("body").iterator();
76 te.seekCeil(new BytesRef(prefix));
77 do {
78 String s = te.term().utf8ToString();
79 if (s.startsWith(prefix)) {
80 termsWithPrefix.add(new Term("body", s));
81 } else {
82 break;
83 }
84 } while (te.next() != null);
85
86 query1.add(termsWithPrefix.toArray(new Term[0]));
87 assertEquals("body:\"blueberry (piccadilly pie pizza)\"", query1.toString());
88 query2.add(termsWithPrefix.toArray(new Term[0]));
89 assertEquals("body:\"strawberry (piccadilly pie pizza)\"", query2
90 .toString());
91
92 ScoreDoc[] result;
93 result = searcher.search(query1, 1000).scoreDocs;
94 assertEquals(2, result.length);
95 result = searcher.search(query2, 1000).scoreDocs;
96 assertEquals(0, result.length);
97
98
99 MultiPhraseQuery query3 = new MultiPhraseQuery();
100 termsWithPrefix.clear();
101 prefix = "blue";
102 te.seekCeil(new BytesRef(prefix));
103
104 do {
105 if (te.term().utf8ToString().startsWith(prefix)) {
106 termsWithPrefix.add(new Term("body", te.term().utf8ToString()));
107 }
108 } while (te.next() != null);
109
110 query3.add(termsWithPrefix.toArray(new Term[0]));
111 query3.add(new Term("body", "pizza"));
112
113 result = searcher.search(query3, 1000).scoreDocs;
114 assertEquals(2, result.length);
115 assertEquals("body:\"(blueberry bluebird) pizza\"", query3.toString());
116
117
118 query3.setSlop(1);
119 result = searcher.search(query3, 1000).scoreDocs;
120
121
122 searcher.explain(query3, 0);
123
124 assertEquals(3, result.length);
125
126
127 MultiPhraseQuery query4 = new MultiPhraseQuery();
128 try {
129 query4.add(new Term("field1", "foo"));
130 query4.add(new Term("field2", "foobar"));
131 fail();
132 } catch (IllegalArgumentException e) {
133
134 }
135
136 writer.close();
137 reader.close();
138 indexStore.close();
139 }
140
141
142 public void testTall() throws IOException {
143 Directory indexStore = newDirectory();
144 RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
145 add("blueberry chocolate pie", writer);
146 add("blueberry chocolate tart", writer);
147 IndexReader r = writer.getReader();
148 writer.close();
149
150 IndexSearcher searcher = newSearcher(r);
151 MultiPhraseQuery q = new MultiPhraseQuery();
152 q.add(new Term("body", "blueberry"));
153 q.add(new Term("body", "chocolate"));
154 q.add(new Term[] {new Term("body", "pie"), new Term("body", "tart")});
155 assertEquals(2, searcher.search(q, 1).totalHits);
156 r.close();
157 indexStore.close();
158 }
159
160 @Ignore
161 public void testMultiSloppyWithRepeats() throws IOException {
162 Directory indexStore = newDirectory();
163 RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
164 add("a b c d e f g h i k", writer);
165 IndexReader r = writer.getReader();
166 writer.close();
167
168 IndexSearcher searcher = newSearcher(r);
169
170 MultiPhraseQuery q = new MultiPhraseQuery();
171
172 q.add(new Term[] {new Term("body", "a"), new Term("body", "b")});
173 q.add(new Term[] {new Term("body", "a")});
174 q.setSlop(6);
175 assertEquals(1, searcher.search(q, 1).totalHits);
176
177 r.close();
178 indexStore.close();
179 }
180
181 public void testMultiExactWithRepeats() throws IOException {
182 Directory indexStore = newDirectory();
183 RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
184 add("a b c d e f g h i k", writer);
185 IndexReader r = writer.getReader();
186 writer.close();
187
188 IndexSearcher searcher = newSearcher(r);
189 MultiPhraseQuery q = new MultiPhraseQuery();
190 q.add(new Term[] {new Term("body", "a"), new Term("body", "d")}, 0);
191 q.add(new Term[] {new Term("body", "a"), new Term("body", "f")}, 2);
192 assertEquals(1, searcher.search(q, 1).totalHits);
193 r.close();
194 indexStore.close();
195 }
196
197 private void add(String s, RandomIndexWriter writer) throws IOException {
198 Document doc = new Document();
199 doc.add(newTextField("body", s, Field.Store.YES));
200 writer.addDocument(doc);
201 }
202
203 public void testBooleanQueryContainingSingleTermPrefixQuery()
204 throws IOException {
205
206
207
208
209 Directory indexStore = newDirectory();
210 RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
211 add("blueberry pie", writer);
212 add("blueberry chewing gum", writer);
213 add("blue raspberry pie", writer);
214
215 IndexReader reader = writer.getReader();
216 IndexSearcher searcher = newSearcher(reader);
217
218 BooleanQuery.Builder q = new BooleanQuery.Builder();
219 q.add(new TermQuery(new Term("body", "pie")), BooleanClause.Occur.MUST);
220
221 MultiPhraseQuery trouble = new MultiPhraseQuery();
222 trouble.add(new Term[] {new Term("body", "blueberry"),
223 new Term("body", "blue")});
224 q.add(trouble, BooleanClause.Occur.MUST);
225
226
227 ScoreDoc[] hits = searcher.search(q.build(), 1000).scoreDocs;
228
229 assertEquals("Wrong number of hits", 2, hits.length);
230
231
232 searcher.explain(q.build(), 0);
233
234 writer.close();
235 reader.close();
236 indexStore.close();
237 }
238
239 public void testPhrasePrefixWithBooleanQuery() throws IOException {
240 Directory indexStore = newDirectory();
241 RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
242 add("This is a test", "object", writer);
243 add("a note", "note", writer);
244
245 IndexReader reader = writer.getReader();
246 IndexSearcher searcher = newSearcher(reader);
247
248
249 BooleanQuery.Builder q = new BooleanQuery.Builder();
250 q.add(new TermQuery(new Term("type", "note")), BooleanClause.Occur.MUST);
251
252 MultiPhraseQuery trouble = new MultiPhraseQuery();
253 trouble.add(new Term("body", "a"));
254 trouble
255 .add(new Term[] {new Term("body", "test"), new Term("body", "this")});
256 q.add(trouble, BooleanClause.Occur.MUST);
257
258
259 ScoreDoc[] hits = searcher.search(q.build(), 1000).scoreDocs;
260 assertEquals("Wrong number of hits", 0, hits.length);
261 writer.close();
262 reader.close();
263 indexStore.close();
264 }
265
266 public void testNoDocs() throws Exception {
267 Directory indexStore = newDirectory();
268 RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
269 add("a note", "note", writer);
270
271 IndexReader reader = writer.getReader();
272 IndexSearcher searcher = newSearcher(reader);
273
274 MultiPhraseQuery q = new MultiPhraseQuery();
275 q.add(new Term("body", "a"));
276 q.add(new Term[] {new Term("body", "nope"), new Term("body", "nope")});
277 assertEquals("Wrong number of hits", 0,
278 searcher.search(q, 1).totalHits);
279
280
281 searcher.explain(q, 0);
282
283 writer.close();
284 reader.close();
285 indexStore.close();
286 }
287
288 public void testHashCodeAndEquals() {
289 MultiPhraseQuery query1 = new MultiPhraseQuery();
290 MultiPhraseQuery query2 = new MultiPhraseQuery();
291
292 assertEquals(query1.hashCode(), query2.hashCode());
293 assertEquals(query1, query2);
294
295 Term term1 = new Term("someField", "someText");
296
297 query1.add(term1);
298 query2.add(term1);
299
300 assertEquals(query1.hashCode(), query2.hashCode());
301 assertEquals(query1, query2);
302
303 Term term2 = new Term("someField", "someMoreText");
304
305 query1.add(term2);
306
307 assertFalse(query1.hashCode() == query2.hashCode());
308 assertFalse(query1.equals(query2));
309
310 query2.add(term2);
311
312 assertEquals(query1.hashCode(), query2.hashCode());
313 assertEquals(query1, query2);
314 }
315
316 private void add(String s, String type, RandomIndexWriter writer)
317 throws IOException {
318 Document doc = new Document();
319 doc.add(newTextField("body", s, Field.Store.YES));
320 doc.add(newStringField("type", type, Field.Store.NO));
321 writer.addDocument(doc);
322 }
323
324
325 public void testEmptyToString() {
326 new MultiPhraseQuery().toString();
327 }
328
329 public void testCustomIDF() throws Exception {
330 Directory indexStore = newDirectory();
331 RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
332 add("This is a test", "object", writer);
333 add("a note", "note", writer);
334
335 IndexReader reader = writer.getReader();
336 IndexSearcher searcher = newSearcher(reader);
337 searcher.setSimilarity(new DefaultSimilarity() {
338 @Override
339 public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
340 return Explanation.match(10f, "just a test");
341 }
342 });
343
344 MultiPhraseQuery query = new MultiPhraseQuery();
345 query.add(new Term[] { new Term("body", "this"), new Term("body", "that") });
346 query.add(new Term("body", "is"));
347 Weight weight = query.createWeight(searcher, true);
348 assertEquals(10f * 10f, weight.getValueForNormalization(), 0.001f);
349
350 writer.close();
351 reader.close();
352 indexStore.close();
353 }
354
355 public void testZeroPosIncr() throws IOException {
356 Directory dir = new RAMDirectory();
357 final Token[] tokens = new Token[3];
358 tokens[0] = new Token();
359 tokens[0].append("a");
360 tokens[0].setPositionIncrement(1);
361 tokens[1] = new Token();
362 tokens[1].append("b");
363 tokens[1].setPositionIncrement(0);
364 tokens[2] = new Token();
365 tokens[2].append("c");
366 tokens[2].setPositionIncrement(0);
367
368 RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
369 Document doc = new Document();
370 doc.add(new TextField("field", new CannedTokenStream(tokens)));
371 writer.addDocument(doc);
372 doc = new Document();
373 doc.add(new TextField("field", new CannedTokenStream(tokens)));
374 writer.addDocument(doc);
375 IndexReader r = writer.getReader();
376 writer.close();
377 IndexSearcher s = newSearcher(r);
378 MultiPhraseQuery mpq = new MultiPhraseQuery();
379
380
381
382
383
384
385
386
387 if (true) {
388 mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0);
389 mpq.add(new Term[] {new Term("field", "a")}, 0);
390 } else {
391 mpq.add(new Term[] {new Term("field", "a")}, 0);
392 mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0);
393 }
394 TopDocs hits = s.search(mpq, 2);
395 assertEquals(2, hits.totalHits);
396 assertEquals(hits.scoreDocs[0].score, hits.scoreDocs[1].score, 1e-5);
397
398
399
400
401
402
403 r.close();
404 dir.close();
405 }
406
407 private static Token makeToken(String text, int posIncr) {
408 final Token t = new Token();
409 t.append(text);
410 t.setPositionIncrement(posIncr);
411 return t;
412 }
413
414 private final static Token[] INCR_0_DOC_TOKENS = new Token[] {
415 makeToken("x", 1),
416 makeToken("a", 1),
417 makeToken("1", 0),
418 makeToken("m", 1),
419 makeToken("b", 1),
420 makeToken("1", 0),
421 makeToken("n", 1),
422 makeToken("c", 1),
423 makeToken("y", 1)
424 };
425
426 private final static Token[] INCR_0_QUERY_TOKENS_AND = new Token[] {
427 makeToken("a", 1),
428 makeToken("1", 0),
429 makeToken("b", 1),
430 makeToken("1", 0),
431 makeToken("c", 1)
432 };
433
434 private final static Token[][] INCR_0_QUERY_TOKENS_AND_OR_MATCH = new Token[][] {
435 { makeToken("a", 1) },
436 { makeToken("x", 1), makeToken("1", 0) },
437 { makeToken("b", 2) },
438 { makeToken("x", 2), makeToken("1", 0) },
439 { makeToken("c", 3) }
440 };
441
442 private final static Token[][] INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN = new Token[][] {
443 { makeToken("x", 1) },
444 { makeToken("a", 1), makeToken("1", 0) },
445 { makeToken("x", 2) },
446 { makeToken("b", 2), makeToken("1", 0) },
447 { makeToken("c", 3) }
448 };
449
450
451
452
453
454 public void testZeroPosIncrSloppyParsedAnd() throws IOException {
455 MultiPhraseQuery q = new MultiPhraseQuery();
456 q.add(new Term[]{ new Term("field", "a"), new Term("field", "1") }, -1);
457 q.add(new Term[]{ new Term("field", "b"), new Term("field", "1") }, 0);
458 q.add(new Term[]{ new Term("field", "c") }, 1);
459 doTestZeroPosIncrSloppy(q, 0);
460 q.setSlop(1);
461 doTestZeroPosIncrSloppy(q, 0);
462 q.setSlop(2);
463 doTestZeroPosIncrSloppy(q, 1);
464 }
465
466 private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
467 Directory dir = newDirectory();
468 IndexWriterConfig cfg = newIndexWriterConfig(null);
469 IndexWriter writer = new IndexWriter(dir, cfg);
470 Document doc = new Document();
471 doc.add(new TextField("field", new CannedTokenStream(INCR_0_DOC_TOKENS)));
472 writer.addDocument(doc);
473 IndexReader r = DirectoryReader.open(writer,false);
474 writer.close();
475 IndexSearcher s = newSearcher(r);
476
477 if (VERBOSE) {
478 System.out.println("QUERY=" + q);
479 }
480
481 TopDocs hits = s.search(q, 1);
482 assertEquals("wrong number of results", nExpected, hits.totalHits);
483
484 if (VERBOSE) {
485 for(int hit=0;hit<hits.totalHits;hit++) {
486 ScoreDoc sd = hits.scoreDocs[hit];
487 System.out.println(" hit doc=" + sd.doc + " score=" + sd.score);
488 }
489 }
490
491 r.close();
492 dir.close();
493 }
494
495
496
497
498 public void testZeroPosIncrSloppyPqAnd() throws IOException {
499 PhraseQuery.Builder builder = new PhraseQuery.Builder();
500 int pos = -1;
501 for (Token tap : INCR_0_QUERY_TOKENS_AND) {
502 pos += tap.getPositionIncrement();
503 builder.add(new Term("field", tap.toString()), pos);
504 }
505 builder.setSlop(0);
506 doTestZeroPosIncrSloppy(builder.build(), 0);
507 builder.setSlop(1);
508 doTestZeroPosIncrSloppy(builder.build(), 0);
509 builder.setSlop(2);
510 doTestZeroPosIncrSloppy(builder.build(), 1);
511 }
512
513
514
515
516 public void testZeroPosIncrSloppyMpqAnd() throws IOException {
517 final MultiPhraseQuery mpq = new MultiPhraseQuery();
518 int pos = -1;
519 for (Token tap : INCR_0_QUERY_TOKENS_AND) {
520 pos += tap.getPositionIncrement();
521 mpq.add(new Term[]{new Term("field",tap.toString())}, pos);
522 }
523 doTestZeroPosIncrSloppy(mpq, 0);
524 mpq.setSlop(1);
525 doTestZeroPosIncrSloppy(mpq, 0);
526 mpq.setSlop(2);
527 doTestZeroPosIncrSloppy(mpq, 1);
528 }
529
530
531
532
533 public void testZeroPosIncrSloppyMpqAndOrMatch() throws IOException {
534 final MultiPhraseQuery mpq = new MultiPhraseQuery();
535 for (Token tap[] : INCR_0_QUERY_TOKENS_AND_OR_MATCH) {
536 Term[] terms = tapTerms(tap);
537 final int pos = tap[0].getPositionIncrement()-1;
538 mpq.add(terms, pos);
539 }
540 doTestZeroPosIncrSloppy(mpq, 0);
541 mpq.setSlop(1);
542 doTestZeroPosIncrSloppy(mpq, 0);
543 mpq.setSlop(2);
544 doTestZeroPosIncrSloppy(mpq, 1);
545 }
546
547
548
549
550 public void testZeroPosIncrSloppyMpqAndOrNoMatch() throws IOException {
551 final MultiPhraseQuery mpq = new MultiPhraseQuery();
552 for (Token tap[] : INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN) {
553 Term[] terms = tapTerms(tap);
554 final int pos = tap[0].getPositionIncrement()-1;
555 mpq.add(terms, pos);
556 }
557 doTestZeroPosIncrSloppy(mpq, 0);
558 mpq.setSlop(2);
559 doTestZeroPosIncrSloppy(mpq, 0);
560 }
561
562 private Term[] tapTerms(Token[] tap) {
563 Term[] terms = new Term[tap.length];
564 for (int i=0; i<terms.length; i++) {
565 terms[i] = new Term("field",tap[i].toString());
566 }
567 return terms;
568 }
569
570 public void testNegativeSlop() throws Exception {
571 MultiPhraseQuery query = new MultiPhraseQuery();
572 query.add(new Term("field", "two"));
573 query.add(new Term("field", "one"));
574 try {
575 query.setSlop(-2);
576 fail("didn't get expected exception");
577 } catch (IllegalArgumentException expected) {
578
579 }
580 }
581
582 }